*Data Appendix*
*Creating Table 4 (Patenting analysis):
cd "Directory/Replication files"

*converting ASCII file into stata file:
insheet using pat76_06_pdp.csv
save pat76_06_pdp.dta, replace
clear
insheet using dynass.csv
save dynass.dta, replace
clear
insheet using pdpcohdr.csv
save pdpcohdr.dta, replace
clear
insheet using compustat19702005.csv
save compustat19702005.dta, replace
clear
insheet using gdp_deflator.csv
save gdp_deflator.dta, replace
clear
insheet using CMSdata.csv
save CMSdata.dta, replace
clear
insheet using industry_output.csv
save industry_output.dta, replace
clear
insheet using ExternalFinance.csv
save ExternalFinance.dta, replace
clear

*making Patent Data from NBER patent data:
use pat76_06_pdp.dta, clear //this is all utility patents from NBER
collapse (count)NumPats=patent , by(pdpass appyear) 
rename appyear year
sort pdpass year
merge pdpass using dynass.dta //merge with dynamic assignee file from NBER
tab _merge
keep if _merge==3
drop _merge 
*find appropriate gvkey to assign in each year
gen gvkey=.
forvalue i=1/5 {
	replace gvkey=gvkey`i' if gvkey`i'~=. & year>=begyr`i' & year<=endyr`i'
	}
keep if gvkey~=.
keep gvkey year NumPats 
*sum over multiple pdpass that are for the same gvkey
collapse (sum)NumPats , by (gvkey year) 
sort gvkey year
save PatData.dta, replace 

*financial data from Compustat:
use compustat19702005.dta, clear //from Compustat
destring sic, replace
destring gvkey, replace
drop if sic==.
drop if gvkey==.
rename fyear year
sort year
merge year using gdp_deflator.dta //deflating compustat variables
tab _merge
drop _merge
replace ppent=0 if ppent==.
replace xrd=0 if xrd==.
replace xrd=0 if xrd<0 
replace revt=0 if revt==.
replace revt=0 if revt<0
replace emp=0 if emp==.
rename xrd xrd_orig
rename revt revt_orig
rename ppent ppent_orig
gen xrd=(xrd_orig/gdp_deflator)*100
gen revt=(revt_orig/gdp_deflator)*100
gen ppent=(ppent_orig/gdp_deflator)*100
sort gvkey year
save Compustat_data.dta, replace

*creating the estimation sample:
use PatData.dta, clear
merge 1:1 gvkey year using Compustat_data.dta
tab _merge
drop if _merge==1 //drop those in patent data without data in compustat 
drop _merge
sort gvkey year
merge gvkey using pdpcohdr.dta //from NBER patent data
tab _merge
drop if _merge==2 //identify those compustat firms that we know have zero patents (rather than missing patent data)
drop _merge
drop cusip name firstyr lastyr pdpco pdpseq begyr endyr
gen mtchflg=match~=. //create match flag variable
replace NumPats =0 if mtchflg & NumPats==.
drop match

gen sic3=int(sic/10) 
sort sic3 year
merge sic3 year using industry_output.dta //merging in manufacturing sector industry output
tab _merge
drop if _merge==2
drop _merge
sort sic3
merge sic3 using CMSdata.dta //merging in CMS survey data 
tab _merge
drop if _merge==2
drop _merge
sort sic3
merge sic3 using ExternalFinance //merging measure of external finance dependency at sic3
tab _merge
drop if _merge==2
drop _merge

*variable construction*
gen ln_NumPats=ln(NumPats+1)
gen ln_rd=ln(xrd+1)
gen ln_sales=ln(revt+1)
gen ln_emp=ln(emp+1)
gen ln_ppent=ln(ppent+1)
sort gvkey year
by gvkey: gen ln_lagrd=ln_rd[_n-1]
by gvkey: gen ln_lag2rd=ln_rd[_n-2]
by gvkey: gen ln_lagsales=ln_sales[_n-1] 
by gvkey: gen ln_lagemp=ln_emp[_n-1]
by gvkey: gen ln_lagppe=ln_ppent[_n-1]
gen ln_rgout=ln(rgout+1)
*patent appropriability effectiveness measure:
gen q32product=5 if q32b==1 & q32b!=.
replace q32product=30 if q32b>1 & q32b<=2 & q32b!=.
replace q32product=50 if q32b>2 & q32b<=3 & q32b!=.
replace q32product=75 if q32b>3 & q32b<=4 & q32b!=.
replace q32product=95 if q32b>4 & q32b!=.
sum q32product
gen q33process=5 if q33b==1 & q33b!=.
replace q33process=30 if q33b>1 & q33b<=2 & q33b!=.
replace q33process=50 if q33b>2 & q33b<=3 & q33b!=.
replace q33process=75 if q33b>3 & q33b<=4 & q33b!=.
replace q33process=95 if q33b>4 & q33b!=.
sum q33process
gen pat_effect=(q33process*q46a+q32product*q46b)/(q46a+q46b)
replace pat_effect=pat_effect/100
sum pat_effect, d
*continuous weighted fast measure:
gen fast_cont=(q27a*q46a+q27b*q46b)/(q46a+q46b)
sum fast_cont, d

gen ln_rgoutXpateff=ln_rgout*pat_effect
gen ln_rgoutXfastcont=ln_rgout*fast_cont
gen ln_rgoutXext=ln_rgout*external
keep if year>1974 & year<2003
tab year, gen (yy)
tsset gvkey year
save Patent_analysis, replace //final estimation sample

*Patent analysis*
xtpqml NumPat ln_rgout ln_lagsales ln_lagemp ln_lagppe if fast_cont!=. & ln_lagrd!=. & ln_lag2rd!=., fe i(gvkey)
xtpqml NumPat ln_rgout ln_lagrd ln_lag2rd ln_lagsales ln_lagemp ln_lagppe if fast_cont!=. , fe i(gvkey)
xtpqml NumPat ln_rgout ln_lagrd  ln_lag2rd ln_lagsales ln_lagemp ln_lagppe  yy** if fast_cont!=. , fe i(gvkey)
xtpqml NumPat ln_rgout ln_rgoutXpateff ln_lagrd ln_lag2rd ln_lagsales ln_lagemp ln_lagppe   yy** if fast_cont!=. , fe i(gvkey)
xtpqml NumPat ln_rgout ln_rgoutXfastcont ln_lagrd ln_lag2rd ln_lagsales ln_lagemp ln_lagppe  yy** if fast_cont!=. , fe i(gvkey)
xtpqml NumPat ln_rgout ln_rgoutXpateff ln_rgoutXfastcont ln_lagrd ln_lag2rd ln_lagsales ln_lagemp ln_lagppe  yy** if fast_cont!=. , fe i(gvkey)
xtpqml NumPat ln_rgout ln_rgoutXpateff ln_rgoutXfastcont ln_rgoutXext ln_lagrd ln_lag2rd ln_lagsales ln_lagemp ln_lagppe   yy**  , fe i(gvkey)
xtpqml NumPat ln_rgout ln_rgoutXpateff ln_rgoutXfastcont ln_rgoutXext ln_lagrd ln_lag2rd ln_lagsales ln_lagemp ln_lagppe    , fe i(gvkey)
